* START_312113_ICE_2002_07.SAS -- produces ICE datasets;

%include "ASMimplibs.sas";


*OPTIONS obs=5000;run;


* PART 1: RAW DATA FOR PRODUCERS OF PRODUCT;
%macro chk(y,p);

 data chk&y;
   set cmf.cmf&y.prod (keep = survu_id NAICSPC );
   if NAICSPC in (&p);
 run;

 title "&y -- # of product trailer records with NAICSPC=ice";run;
 proc freq data=chk&y;table NAICSPC;run;

 title "&y -- product trailer records with NAICSPC=ice";run;
 proc print data= chk&y; run;

 proc sort data=chk&y nodupkey out=est&y;by survu_id;

 title "&y -- # of estabs that have ice in product trailer file";run;
 title2 "ignore the actual products in this case";run;
 proc freq data=est&y;table NAICSPC;run;

 * now get all of the product records for these guys;
 * these other products will be used in creating the PPSR measure;
 * so dont want to include '9', balancing codes, admin records;
 * or pv<0;

  data fhs.ice&y;
   merge est&y(in=one keep=survu_id)
   cmf.cmf&y.prod(keep=survu_id NAICSPC NAICSPC_COL pqs pv pqs_f pv_f);
   by survu_id;
   if one and pv ne . and NAICSPC ne "";
   if NAICSPC in (&p) then cflag=1;else cflag=0; *create a flag for block ice data;
   if pqs>0 then pflag=1;else pflag=0;
   NAICSPC1=substr(NAICSPC,1,1);
   NAICSPC7=substr(NAICSPC,7,4);
   NAICSPC8=substr(NAICSPC,1,8);

   if NAICSPC1='9' or NAICSPC8 in ('77000000' , '00093000') OR NAICSPC in ('0009998900' , '0009998000' ) OR NAICSPC7 in ('WYWW' , '0YWW' , '000-' ) or pv<0
   then dflag=0;
   else dflag=1;
  run;

 /* title "&y -- checking nonlegit product codes"; */
  data xx&y;
    set fhs.ice&y;
    if dflag=0;
  run;

  /* proc freq data=xx&y;table NAICSPC;run; */

  data xx&y;
     set xx&y;
     if pv<0;
  run;

  title "&y -- Product trailer records with pv<0 (EXCLUDING PV = .)";run; 
  title2 "For plants that produce ice";run; 
  proc print data=xx&y;var survu_id NAICSPC pv;
  where pv ne .;
  run;

  
  title "&y -- # of products (and ice vs non-ice)";run;
  title2 "&y -- for hardwood flooring-producing plants";run; 
  title3 "cflag=1: hardwood flooring;  cflag=0: not hardwood flooring";run;
   proc freq data=fhs.ice&y;table cflag;run;

  title "&y -- Edit/impute flags, legit ice products"; 
  title2 "with pv > 0 ";run; 
   proc freq data=fhs.ice&y; table pqs_f pv_f;
   where NAICSPC in (&p) and pv > 0 and dflag=1;
  run;

  title "&y -- # of legit products vs. non-legit or pv<0";run;
  title2 "dflag=1: legit product codes"; run;
   proc freq data=fhs.ice&y;table dflag;run;

  title "&y -- # of physical products";run;
  title2 "pflag=1: product trailer record has physical product data (pqs>0)";
   proc freq data=fhs.ice&y;table pflag pflag*dflag;run;

%mend;


%chk(2002,%STR('3121130111' , '3121130121' ));
%chk(2007,%STR('3121130100' ));


* PART 2: CLEANED UP DATA;
* Exclusion based on low ppsr;
* ppsr calculated only on legit codes;
* and using total product for numerator;

%macro chk2(y,p);
* Creating PPSR:
  * 1) Pick only legitimate codes;
         data good&y;
           set fhs.ice&y;
           if dflag=1;     *legitimate product codes only;
         run;
         proc sort data=good&y;by SURVU_ID;run;

  * 2) Sum up all of the products that the estab has;
          proc summary data=good&y nway;
            by survu_id;
            var pv;
          output out=totpv&y sum=totpv;run;


  * 3) Create single product of interest (if needed);
   * 3.1) Construct plant-level impute flag ;

         data cc&y;
           set good&y;
           if NAICSPC in (&p);
           if pqs_f in (' V','RV',' J','RJ')
           then pqs_imp=1;
           else pqs_imp=0;
           if pv_f in (' H',' L',' J',' B','RL','RB','RH')
           then pv_imp=1;
           else pv_imp=0;
           prodcount=1;
           pqs_valimp=pqs*pqs_imp;
           pv_valimp=pv*pv_imp;
         run;

         data test&y;
           set cc&y;
           if pv>0 then pqspvrat=pqs/pv;
           if pqspvrat ne .;
         run;
         proc sort data=test&y; by pqs_imp; run;
        
         proc univariate data=test&y;
          var pqspvrat;
          by pqs_imp; 
          title "&y -- distribution of PQS/PV ratio by impute flag";
         run;

         proc sort data=test&y; by NAICSPC; run;
 
         proc univariate data=test&y;
          var pqspvrat;
          by NAICSPC; 
          WHERE pqs_imp=1;
          title "&y -- distribution of PQS/PV by NAICSPC, imputed PQS cases";
         run;



         proc summary data=cc&y nway;
            by survu_id;
            var prodcount;
          output out=prodcnt&y sum=;
         run;


         proc summary data=cc&y nway;
            by survu_id;
            var pv pqs pqs_imp pv_imp pqs_valimp pv_valimp;
          output out=prod&y sum=;
         run;

         data prod&y;
            merge prodcnt&y prod&y;
            by survu_id;
         run;

         data prod&y;
          set prod&y;
          pqs_imprat = pqs_imp/prodcount;
          label pqs_imprat="Fraction of PQS records (our products only) with imputed data";
          pv_imprat = pv_imp/prodcount;
          label pv_imprat="Fraction of PV records (our products only) with imputed data";
          if pqs>0 then pqs_valimpr = pqs_valimp/pqs; else pqs_valimpr=.;
          label pqs_valimpr = "Proportion of PQS VALUE (our products only) that is imputed"; 
          if pv>0 then pv_valimpr = pv_valimp/pv; else pv_valimpr=.;
          label pv_valimpr = "Proportion of PV VALUE (our products only) that is imputed"; 
         run;


  * 4) Create the price and ppsr;

         data chk1&y;
            merge prod&y totpv&y;
            by survu_id;
            if totpv > 0 then ppsr1=pv/totpv;
            else ppsr1=.;
            label ppsr1="Product Specialization Ratio (version 1)";
            if ppsr1=1 then ttflag=1;else ttflag=0;
            label ttflag="TTFLAG=1 when there is only 1 product produced";
            if pqs>0 then price=pv/pqs;
            label price="Constructed Price=PV/PQS";
          run;

          title "&y -- Price properties";run;
          title2 "Dollars per short ton";
          proc univariate data=chk1&y;var price;run;

  * 5) Properties of PPSR;

         title "&y -- PPSR1 properties";run;
         proc univariate data=chk1&y;var ppsr1;run;

         proc freq data=chk1&y;table ttflag;run;
         
         * now just looking at PPSR for multi-product estabs;
         data multi&y;
          set chk1&y;
          if ttflag=0;
         run;

         title2 "Just for multi-product estabs";run;
         proc univariate data=multi&y;var ppsr1;run;
             
   * 6) Applying exlusion rule based on PPSR & PQS;

           data big&y;
             set chk1&y;
             if ppsr1>0.50 then ppsrflag=1;else ppsrflag=0;
             if pqs=0 then phyflag=0;else phyflag=1;
             label phyflag="PHYFLAG=1 when has physical data";
           run;

           title "&y -- estabs with ppsr1>0.50 & physical data";run;
           proc freq data=big&y;table ppsrflag phyflag;run;

           data fhs.icef&y (keep=survu_id price pqs pv ppsr1 phyflag pqs_imprat pv_imprat pqs_valimpr pv_valimpr);
             set big&y;
             if phyflag=1;
           run;

%mend;

%chk2(2002,%STR('3121130111' , '3121130121' ));
%chk2(2007,%STR('3121130100' ));


* PART 3: PROPERTIES OF THE FINAL DATASET;

%macro chk3(y,i);

 * 1) Their Industries;
   proc sort data=fhs.icef&y out=icef&y;by survu_id;run;
   data ind&y;
     merge icef&y (in=one) cmf.cmf&y (keep=survu_id NAICS_NEW);
     by survu_id;
     if one;
   run;

   title "&y -- NAICS_NEW of final block ice dataset";run;
   proc freq data=ind&y;table NAICS_NEW;run;

  * 1.1) Imputations of product-trailer data for our products;

    title "&y -- Imputation rates for ice products, prod trailer data"; run;
    proc freq data=fhs.icef&y; 
     tables pqs_imprat pv_imprat; 
    run;

    title "&y -- Proportions of total PQS and PV value imputed, ice products"; run;
    proc means data=fhs.icef&y N min p10 q1 median mean q3 p90 max; 
     var pqs_valimpr pv_valimpr; 
    run;


  * 2) Coverage of the Industry;
    proc sort data=fhs.icef&y out=icef&y;by survu_id;run;
    data icef&y;
     set icef&y;
     ourflag=1;
    run;

   data tot&y;
     merge icef&y (keep=survu_id ourflag) cmf.cmf&y (keep=survu_id NAICS_NEW tvs ar tabbed);
     by survu_id;
   run;

   proc freq data=tot&y;table ourflag;run;

   data tot&y;
     set tot&y;
     if ourflag=. then ourflag=0;
     if NAICS_NEW in ("&i");
   run;

  title "&y-- our sample vs total sample";
  proc freq data=tot&y;table ourflag ar*ourflag tabbed*ourflag; run;

  proc summary data=tot&y;
    var tvs;
    class ourflag;
  output out=sum&y sum=;run;

  title "&y -- total tvs";run;
  proc print data=sum&y;run;

   data nonar&y;
    set tot&y;
    if ar=0;
   run;

  proc summary data=nonar&y;
    var tvs;
    class ourflag;
  output out=sum2&y sum=;run;

  title "&y -- non-ar tvs";run;
  proc print data=sum2&y;run;

%mend;


%chk3(2002,31211300);
%chk3(2007,31211300);


